Edited by Shenfeng Qiu 07232022; v8 only used for generating knitr html file

# [You have to change] set this following work directory to your file location
setwd("D:/Bioinformatics Projects/LindseyAanalysis/single_cell/two_brain_groups/")
library(Seurat)
## Attaching SeuratObject
## Attaching sp
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(cowplot)
library(ggplot2)
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.12.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(circlize)
## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:rstatix':
## 
##     desc
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:sp':
## 
##     %over%
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:SeuratObject':
## 
##     Assays
## The following object is masked from 'package:Seurat':
## 
##     Assays
library(patchwork)
## 
## Attaching package: 'patchwork'
## The following object is masked from 'package:cowplot':
## 
##     align_plots
library(SingleR)
library(celldex)
## 
## Attaching package: 'celldex'
## The following objects are masked from 'package:SingleR':
## 
##     BlueprintEncodeData, DatabaseImmuneCellExpressionData,
##     HumanPrimaryCellAtlasData, ImmGenData, MonacoImmuneData,
##     MouseRNAseqData, NovershternHematopoieticData
wd <- list()
wd$data <- "D:/Bioinformatics Projects/LindseyAanalysis/single_cell/slim_counts/"
wd$output <- "D:/Bioinformatics Projects/LindseyAanalysis/single_cell/two_brain_groups/v8_output/"

Import Group samples

# You might need to change the location "slim_counts/", to direct to your data folder
for (file in c("m09",
               "m10",
               "m11",
               "m12")){
  seurat_data <- Read10X(data.dir = paste0("D:/Bioinformatics Projects/LindseyAanalysis/single_cell/slim_counts/", file))
  seurat_obj <- CreateSeuratObject(counts = seurat_data, 
                                   min.features = 100, 
                                   project = file)
  assign(file, seurat_obj)
  
               }
m09 <- AddMetaData(object = m09, metadata = "m09", col.name = "sample.id")
m10 <- AddMetaData(object = m10, metadata = "m10", col.name = "sample.id") 
m11 <- AddMetaData(object = m11, metadata = "m11", col.name = "sample.id")
m12 <- AddMetaData(object = m12, metadata = "m12", col.name = "sample.id")
m09 <- AddMetaData(object = m09, metadata = "WT", col.name = "group.id") 
m10 <- AddMetaData(object = m10, metadata = "WT", col.name = "group.id")
m11 <- AddMetaData(object = m11, metadata = "KO", col.name = "group.id") 
m12 <- AddMetaData(object = m12, metadata = "KO", col.name = "group.id")

gallitano <- merge(m09, y = c(m10,m11,m12), 
                   add.cell.ids = c("m09","m10","m11", "m12"), 
                   project = "two_groups")
gallitano[["percent.mt"]] <- PercentageFeatureSet(gallitano, pattern = "^mt-") 

rm(m09, m10, m11, m12)
rm(seurat_obj, seurat_data)
head(gallitano)
##                        orig.ident nCount_RNA nFeature_RNA sample.id group.id
## m09_AAACCCACAAAGACTA-1        m09       2042          993       m09       WT
## m09_AAACCCACAACCGATT-1        m09      13933         4279       m09       WT
## m09_AAACCCACACTGCGTG-1        m09       3407         2057       m09       WT
## m09_AAACCCATCGACATTG-1        m09      19957         5090       m09       WT
## m09_AAACCCATCTCGTTTA-1        m09      10917         3830       m09       WT
## m09_AAACGAAAGTACAGAT-1        m09       8316         3191       m09       WT
## m09_AAACGAAAGTCACGAG-1        m09       1004          620       m09       WT
## m09_AAACGAACAGAACATA-1        m09      15571         4718       m09       WT
## m09_AAACGAAGTGGTTTAC-1        m09       4293         1752       m09       WT
## m09_AAACGAATCAGTGATC-1        m09      44495         7519       m09       WT
##                        percent.mt
## m09_AAACCCACAAAGACTA-1  11.361410
## m09_AAACCCACAACCGATT-1   8.433216
## m09_AAACCCACACTGCGTG-1   6.750807
## m09_AAACCCATCGACATTG-1  11.564864
## m09_AAACCCATCTCGTTTA-1   6.531098
## m09_AAACGAAAGTACAGAT-1   4.882155
## m09_AAACGAAAGTCACGAG-1  15.637450
## m09_AAACGAACAGAACATA-1  12.080149
## m09_AAACGAAGTGGTTTAC-1  16.887957
## m09_AAACGAATCAGTGATC-1   3.577930
tail(gallitano)
##                        orig.ident nCount_RNA nFeature_RNA sample.id group.id
## m12_TTTGGTTAGGTTCATC-1        m12       1290          639       m12       KO
## m12_TTTGGTTCACCAGCGT-1        m12       5415         2483       m12       KO
## m12_TTTGGTTCAGGTATGG-1        m12       6148         2765       m12       KO
## m12_TTTGGTTGTCCTGTTC-1        m12        542          396       m12       KO
## m12_TTTGGTTGTGAGGCAT-1        m12        618          459       m12       KO
## m12_TTTGTTGAGGAGCTGT-1        m12       1841         1056       m12       KO
## m12_TTTGTTGCAAGTACCT-1        m12       4701         2242       m12       KO
## m12_TTTGTTGGTAGGACCA-1        m12       2935         1647       m12       KO
## m12_TTTGTTGTCCGACGGT-1        m12      13358         2314       m12       KO
## m12_TTTGTTGTCTTCTAAC-1        m12       7137         3542       m12       KO
##                        percent.mt
## m12_TTTGGTTAGGTTCATC-1  29.302326
## m12_TTTGGTTCACCAGCGT-1   2.271468
## m12_TTTGGTTCAGGTATGG-1   2.569941
## m12_TTTGGTTGTCCTGTTC-1   6.457565
## m12_TTTGGTTGTGAGGCAT-1   1.456311
## m12_TTTGTTGAGGAGCTGT-1   4.454101
## m12_TTTGTTGCAAGTACCT-1   3.531164
## m12_TTTGTTGGTAGGACCA-1   1.635434
## m12_TTTGTTGTCCGACGGT-1   1.235215
## m12_TTTGTTGTCTTCTAAC-1   2.101723
table(gallitano@meta.data$sample.id)
## 
##   m09   m10   m11   m12 
##  5194 15772  9932  4357
# m09   m10   m11   m12 
# 5194 15772  9932  4357 
table(gallitano@meta.data$group.id)
## 
##    KO    WT 
## 14289 20966
# KO    WT 
#14289 20966 

Quality Control

Plot Data

VlnPlot(gallitano, 
        features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), 
        group.by = "sample.id",
        ncol = 1) + ggtitle("Quality assessment before filtering")

plot1 <- FeatureScatter(gallitano, feature1 = "nCount_RNA", feature2 = "percent.mt") 
line.data <- data.frame(yintercept = c(1500, 7500), Lines = c("lower", "upper"))
line2.data <- data.frame(xintercept = c(500), Lines = c("lower"))

plot2 <- FeatureScatter(gallitano, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_hline(aes(yintercept = yintercept, linetype = Lines), line.data) + geom_vline(aes(xintercept = xintercept, linetype = Lines), line2.data)

plot1 + plot2

Joint filtering effects:

# joint filtering effect
# Visualize the correlation between genes detected and number of UMIs and determine whether strong presence of cells with low numbers of genes/UMIs
metadata <- gallitano@meta.data

metadata %>% 
    ggplot(aes(x=nCount_RNA, y=nFeature_RNA, color=percent.mt)) + 
    geom_point() + 
    scale_colour_gradient(low = "gray90", high = "black") +
    stat_smooth(method=lm) +
    scale_x_log10() + 
    scale_y_log10() + 
    theme_classic() +
    geom_vline(xintercept = 500) +
    geom_hline(yintercept = c(1500, 7500))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

# After subseting, I also remove the genes with zero count in more than 100 cells

gallitano <- subset(gallitano, nFeature_RNA < 7500 & 
                      nFeature_RNA>1500 & nCount_RNA > 2000 & 
                      percent.mt < 20)

# Output a logical vector for every gene on whether the more than zero counts per cell
# Extract counts
counts <- GetAssayData(object = gallitano, slot = "counts")
# Output a logical vector for every gene on whether there more than zero counts per cell
nonzero <- counts > 0
# Sums all TRUE values and returns TRUE if more than 100 TRUE values per gene
keep_genes <- Matrix::rowSums(nonzero) >= 100
# Only keeping those genes expressed in more than 100 cells
filtered_counts <- counts[keep_genes, ]
# Reassign to filtered Seurat object
gallitano <- CreateSeuratObject(filtered_counts, meta.data = gallitano@meta.data)

Reaccess quality metrics

dim(gallitano)
## [1] 14377 11424
# 14377 genes x 11424 cells (or UMIs)

table(gallitano@meta.data$sample.id)
## 
##  m09  m10  m11  m12 
## 1966 3633 3896 1929
# m09  m10  m11  m12 
# 1966 3633 3896 1929

VlnPlot(gallitano, 
        features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), 
        ncol = 1) + ggtitle("Quality assessment after filtering")

plot1 <- FeatureScatter(gallitano, feature1 = "nCount_RNA", feature2 = "percent.mt") 
line.data <- data.frame(yintercept = c(1500, 7500), Lines = c("lower", "upper"))
line2.data <- data.frame(xintercept = c(500), Lines = c("lower"))

plot2 <- FeatureScatter(gallitano, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_hline(aes(yintercept = yintercept, linetype = Lines), line.data) + geom_vline(aes(xintercept = xintercept, linetype = Lines), line2.data)

plot1 + plot2

metadata <- gallitano@meta.data

metadata %>% 
    ggplot(aes(x=nCount_RNA, y=nFeature_RNA, color=percent.mt)) + 
    geom_point() + 
    scale_colour_gradient(low = "gray90", high = "black") +
    stat_smooth(method=lm) +
    scale_x_log10() + 
    scale_y_log10() + 
    theme_classic() +
    geom_vline(xintercept = 500) +
    geom_hline(yintercept = c(1500, 7500))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Apply sctransform normalization

options(future.globals.maxSize = 9000 * 1024^2) #100 gb

# Split seurat object by condition to perform cell cycle scoring and SCT on all samples
split_seurat <- SplitObject(gallitano, split.by = "sample.id")

split_seurat <- split_seurat[c("m09","m10", "m11", "m12")]

for (i in 1:length(split_seurat)) {
  split_seurat[[i]] <- NormalizeData(split_seurat[[i]], verbose = TRUE)
    split_seurat[[i]] <- SCTransform(split_seurat[[i]],method = "glmGamPoi", vars.to.regress = c("percent.mt"))
}
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 14376 by 1966
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 1966 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## Found 43 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 14376 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Computing corrected count matrix for 14376 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 35.23748 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Regressing out percent.mt
## Centering data matrix
## Set default assay to SCT
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 14377 by 3633
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 3633 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## Found 48 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 14377 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Computing corrected count matrix for 14377 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 58.54576 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Regressing out percent.mt
## Centering data matrix
## Set default assay to SCT
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 14377 by 3896
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 3896 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## Found 74 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 14377 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Computing corrected count matrix for 14377 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 1.096737 mins
## Determine variable features
## Place corrected count matrix in counts slot
## Regressing out percent.mt
## Centering data matrix
## Set default assay to SCT
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 14377 by 1929
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 1929 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## Found 56 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 14377 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Computing corrected count matrix for 14377 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 31.74446 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Regressing out percent.mt
## Centering data matrix
## Set default assay to SCT
rm(gallitano)

Integration

# Select the most variable features to use for integration
integ_features <- SelectIntegrationFeatures(object.list = split_seurat, 
                                            nfeatures = 3000) 
# Prepare the SCT list object for integration
split_seurat <- PrepSCTIntegration(object.list = split_seurat, 
                                   anchor.features = integ_features)
# Find best buddies - can take a while to run
integ_anchors <- FindIntegrationAnchors(object.list = split_seurat, 
                                        normalization.method = "SCT", 
                                        anchor.features = integ_features)
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 5438 anchors
## Filtering anchors
##  Retained 5090 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 6101 anchors
## Filtering anchors
##  Retained 5797 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 7857 anchors
## Filtering anchors
##  Retained 7257 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 4657 anchors
## Filtering anchors
##  Retained 4571 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 5532 anchors
## Filtering anchors
##  Retained 5454 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 5960 anchors
## Filtering anchors
##  Retained 5864 anchors
# Integrate across conditions
seurat_integrated <- IntegrateData(anchorset = integ_anchors, 
                                   normalization.method = "SCT")
## Merging dataset 4 into 3
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 1 into 2
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 2 1 into 3 4
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
rm(integ_anchors)
rm(integ_features)
rm(split_seurat)

Clustering

# Run the standard workflow for visualization and clustering
seurat_integrated <- RunPCA(object = seurat_integrated, npcs = 40, verbose = F)
ElbowPlot(seurat_integrated, ndims = 40)

PCAPlot(seurat_integrated)  

seurat_integrated <- RunUMAP(seurat_integrated, 
                             dims = 1:40,
                             reduction = "pca")
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 13:40:43 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:40:44 Read 11424 rows and found 40 numeric columns
## 13:40:44 Using Annoy for neighbor search, n_neighbors = 30
## 13:40:44 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:40:47 Writing NN index file to temp file C:\Users\Admin\AppData\Local\Temp\Rtmpe6F2eQ\file3edc7a0f2236
## 13:40:47 Searching Annoy index using 1 thread, search_k = 3000
## 13:40:50 Annoy recall = 100%
## 13:40:52 Commencing smooth kNN distance calibration using 1 thread
## 13:40:55 Initializing from normalized Laplacian + noise
## 13:40:56 Commencing optimization for 200 epochs, with 476818 positive edges
## 13:41:12 Optimization finished
seurat_integrated <- RunTSNE(seurat_integrated, 
                             reduction = "pca")
       # Determine the K-nearest neighbor graph
seurat_integrated <- FindNeighbors(object = seurat_integrated, 
                                   dims = 1:40)
## Computing nearest neighbor graph
## Computing SNN
# Determine the clusters for various resolutions                                
seurat_integrated <- FindClusters(object = seurat_integrated,
                                  resolution = c(0.05, 0.5))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11424
## Number of edges: 399100
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9858
## Number of communities: 12
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11424
## Number of edges: 399100
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9497
## Number of communities: 25
## Elapsed time: 1 seconds
#setwd("D:/Bioinformatics Projects/LindseyAanalysis/single_cell/two_brain_groups")
#saveRDS(seurat_integrated, "./v8_output/Seurat_integrated_twobraingroups.RDS")

Clustering quality control

This step gives us some idea about how is the distribution of the number of genes, number of UMIs, and percentage of mitochondrial genes in each cluster. Normally, we expect to see similar distribution of no. of genes (nFeature_RNA) and no. of UMIs (nCount_RNA).

As for the percent.mt (percentage of mitochondrial genes per cell), it can be a reference to check if those high intensity clusters might be having poor quality cells (if so, we can try to remove in the next step or adjust the metrics in the previous filtering step) or it might be due to the differences biologically

# Determine metrics to plot present in seurat_integrated@meta.data
metrics <-  c("nFeature_RNA", "nCount_RNA", "percent.mt")

FeaturePlot(seurat_integrated, 
            reduction = "umap", 
            features = metrics,
            split.by = "sample.id",
            pt.size = 0.4,
            min.cutoff = 'q10',
            label = TRUE)

additional plotting

Idents(object = seurat_integrated) <- seurat_integrated$integrated_snn_res.0.05
# Idents(object = seurat_integrated) <- seurat_integrated$sample.id
# Idents(object = seurat_integrated) <- seurat_integrated$group.id

DimPlot(seurat_integrated, reduction = "tsne", label = TRUE, pt.size = 1, label.size = 6)

DimPlot(seurat_integrated, reduction = "umap", label = TRUE, pt.size = 2, label.size = 6)

PCAPlot(seurat_integrated)

# do feature plots for selected genes
select_genes = c('Trem2','Snap25','Epas1', 'Egr1', 'Npas4', 'Calcrl') 

FeaturePlot(seurat_integrated, features =select_genes, cols =c('gray','red'), reduction ='tsne', pt.size = 0.7,ncol = 3, label = T)

FeaturePlot(seurat_integrated, features =select_genes, cols =c('gray','red'), reduction = 'umap',pt.size = 0.7,ncol=3, label = T)

# do heatmaps

select_genes2                 <- c('Snap25','Gad1','Gad2', 'Slc32a1', 'Slc17a7', 'Lamp5', 'Aqp4', 'Gfap', "Ndnf", 'Sncg', 'Vip', 'Trem2','Sst','Pvalb', 'Cux2', 'Rorb', 'Fezf2', 'Foxp2', 'Npas4', 'Mbp', 'Cldn5', 'Ctss', 'C1qa')
cellRanks                <- seurat_integrated@meta.data[order(seurat_integrated@meta.data$integrated_snn_res.0.05),] # check cellRanks structure!
PartialMatrix            <- seurat_integrated@assays$integrated@scale.data[which(rownames(seurat_integrated@assays$integrated@scale.data) %in% select_genes2), rownames(cellRanks)]
cellRanks$col            <- rainbow(max(as.numeric(cellRanks$integrated_snn_res.0.05))+1)[as.numeric(cellRanks$integrated_snn_res.0.5)+1]
#
ha_column <- HeatmapAnnotation(
  df  = data.frame(
    ClusterID = as.numeric(cellRanks$integrated_snn_res.0.05)
  ),
  col = list(
    ClusterID = colorRamp2(unique(as.numeric(cellRanks$integrated_snn_res.0.05)), 
                           rainbow(max(as.numeric(cellRanks$integrated_snn_res.0.05))))
  )
)

ht1 = Heatmap(PartialMatrix, name = "Scaled\nUMI", column_title = "Allen SMARTseq dataset genes", 
              top_annotation = ha_column, show_column_names=FALSE, cluster_columns = FALSE,
              row_names_gp = gpar(fontsize = 16))
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 columns You can control `use_raster` argument by explicitly
## setting TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
# do not use TRUE for clustering, too many cells cannot see clearly
ht1

# ht1

# heatmap, without gene name cluster, adopt listed gene sequences
DoHeatmap(seurat_integrated, features = select_genes2)

DoHeatmap(seurat_integrated, features = select_genes2, group.by = 'group.id' )

#DoHeatmap(seurat_integrated, cells = 1:100, features = select_genes2, group.by = 'group.id' )

DoHeatmap(seurat_integrated, features = select_genes2, group.by = 'seurat_clusters' )

# additiontal plots, adjusting format

Idents(object = seurat_integrated) <- seurat_integrated$integrated_snn_res.0.5

DimPlot(seurat_integrated, label = T, pt.size = 1, label.size = 6)

plot <- VlnPlot(seurat_integrated, assay = "RNA", features = c("Gad1","Trem2"), combine = FALSE, fill.by = c("feature","ident")) 
plot <-  lapply(
  X = plot,
  FUN = function(p) p + aes(color= seurat_integrated$integrated_snn_res.0.5)
)
# plot$layers[[2]]$aes_params$alpha <- 0.1
CombinePlots(plots = plot, legend = 'none')
## Warning: CombinePlots is being deprecated. Plots should now be combined using
## the patchwork system.

#p0 <- VlnPlot(seurat_integrated, "Gad1", assay = "RNA", pt.size = 0.2)
#p1 <- VlnPlot(seurat_integrated, "Gad1", assay = "RNA", )
#p1$layers[[2]]$aes_params$alpha <- 0.1
#p0+p1

additional custermized heatmap plot

Idents(object = seurat_integrated) <- seurat_integrated$integrated_snn_res.0.05
# or for finer clusters:
# Idents(object = seurat_integrated) <- seurat_integrated$integrated_snn_res.0.5

DoHeatmap(seurat_integrated, features = select_genes2, disp.min = -1,
          slot = 'scale.data', 
          group.colors = rainbow(9)) +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red",midpoint = 1)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

mapal <- colorRampPalette(RColorBrewer::brewer.pal(11,"RdBu"))(256)
DoHeatmap(seurat_integrated, features = select_genes2, angle = 90, size = 3) + scale_fill_gradientn(colours = rev(mapal))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

# trial DEGs and GSEA analysis for a cluster. Note: the following block can run, but it takes long time and i am not sure it means anythig…so let’s skip this block.

# Idents(object = seurat_integrated) <- seurat_integrated$integrated_snn_res.0.05

# PG.markers.7 <- FindMarkers(seurat_integrated, ident.1 = "0", min.pct = 0.20) # we will adopt the variable name .7, but to represent cluster 0.
#PG.markers.7$gene <- rownames(PG.markers.7)
#library(biomaRt)

#gene_id <- getBM(attributes = c("ensembl_gene_id","external_gene_name","entrezgene_id"), mart = useDataset("mmusculus_gene_ensembl", useMart("ensembl"))) # takes longer time
#PG.markers.7 <- merge(PG.markers.7, gene_id[,c(2,3)], by.x = "gene", by.y = "external_gene_name")
#PG.markers.7.filt <- PG.markers.7
#PG.markers.7.filt <- PG.markers.7.filt[!duplicated(PG.markers.7.filt$entrezgene_id),]
#PG.markers.7.filt <- PG.markers.7[which(PG.markers.7$p_val_adj < 0.05),]
#genelist <- PG.markers.7$avg_log2FC
#names(genelist) <-  PG.markers.7$entrezgene_id
#genelist <- sort(genelist, decreasing = TRUE)

#library(fgsea)
#library(msigdbr)
#library(data.table)
#library(tidyverse)

#m_df <- msigdbr(species = "Mus musculus", category = "C2") %>% dplyr::select(gs_name, entrez_gene)
#m_df_fgsea <- split(x = m_df$entrez_gene, f = m_df$gs_name)

#fgseaRes <- fgsea(pathways = m_df_fgsea, 
                  #stats    = genelist,
                  #eps      = 0.05,
                  #minSize  = 15,
                  #maxSize  = 800) # takes longer ~40min for cluster0 genes
#selected <- fgseaRes[c(979, 329, 443, 445, 907),]
#selected <- selected$pathway

#png(file = "./v8_output/fgsea table.png", width = 800, height = 500)
#plotGseaTable(m_df_fgsea[selected], genelist, fgseaRes, 
             # gseaParam=0.5)
#dev.off()

Find all markers in two samples for cell type identification

Idents(seurat_integrated)<- "seurat_clusters"

cluster1.markers <- FindMarkers(seurat_integrated, ident.1 = 1, min.pct = 0.25, only.pos = TRUE) #compare cluster1 with all the rest of clusters
head(cluster1.markers, n = 5)
##        p_val avg_log2FC pct.1 pct.2 p_val_adj
## Apoe       0   3.822062 1.000 0.196         0
## Aldoc      0   5.758969 0.998 0.102         0
## Cst3       0   3.490138 0.999 0.152         0
## Atp1a2     0   3.423945 1.000 0.160         0
## Mt1        0   5.716655 0.997 0.137         0
VlnPlot(seurat_integrated, assay = "RNA", features = row.names(cluster1.markers)[1:15], pt.size = 0, stack = TRUE, flip = TRUE)

VlnPlot(seurat_integrated, assay = "RNA", features = row.names(cluster1.markers)[1:5], stack = TRUE, flip = TRUE, split.by = "group.id", split.plot = TRUE)
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

#p1 <- VlnPlot(seurat_integrated, assay = "RNA", features = row.names(cluster1.markers)[1:5], pt.size = 0, stack = TRUE, flip = TRUE)
## p1$layers[[1]]$aes_params$alpha <- 0.1 #does not work
#p1

cluster2.markers <- FindMarkers(seurat_integrated, ident.1 = 2, ident.2 = c(3,4), min.pct = 0.25, only.pos = TRUE) #compare cluster2 with clusters 3 and 4
VlnPlot(seurat_integrated, assay = "RNA", features = row.names(cluster2.markers)[1:15], stack = TRUE, flip = TRUE)

# for all markers-higher resolution

Qiu.markers <-  FindAllMarkers(seurat_integrated,
                                    only.pos=TRUE, min.pct=0.25, logfc.threshold=0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
## Calculating cluster 13
## Calculating cluster 14
## Calculating cluster 15
## Calculating cluster 16
## Calculating cluster 17
## Calculating cluster 18
## Calculating cluster 19
## Calculating cluster 20
## Calculating cluster 21
## Calculating cluster 22
## Calculating cluster 23
## Calculating cluster 24
write.csv(Qiu.markers,paste0(wd$output,"Qiu_all_markers_20220226.csv"))
head(Qiu.markers,10)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj cluster    gene
## Snap25   0.000000e+00  3.2560060 0.821 0.275  0.000000e+00       0  Snap25
## Nrgn     0.000000e+00  3.0582446 0.714 0.247  0.000000e+00       0    Nrgn
## Ywhaz    0.000000e+00  2.8312954 0.801 0.246  0.000000e+00       0   Ywhaz
## Ywhah    0.000000e+00  2.7537510 0.804 0.310  0.000000e+00       0   Ywhah
## Calm2    0.000000e+00  2.0530944 0.787 0.304  0.000000e+00       0   Calm2
## Camk2n1  0.000000e+00  1.8180151 0.712 0.250  0.000000e+00       0 Camk2n1
## Cck      0.000000e+00  0.8036294 0.702 0.220  0.000000e+00       0     Cck
## Basp1   4.240489e-258  1.6305991 0.711 0.312 1.272147e-254       0   Basp1
## Ppp3ca  1.347563e-252  2.0267032 0.708 0.303 4.042689e-249       0  Ppp3ca
## Chn1    9.230218e-252  2.2142401 0.714 0.330 2.769065e-248       0    Chn1
y <- Qiu.markers %>% group_by(cluster) %>% top_n(n = 1, wt = avg_log2FC)
VlnPlot(seurat_integrated, assay = "RNA", features = y$gene, stack = TRUE)

v3 <- VlnPlot(seurat_integrated, assay = "RNA", features = "Foxp2")
#v3 <- VlnPlot(seurat_integrated, assay = "RNA", features = c("Foxp2", "Kcnab3"))
v3$layers[[2]]$aes_params$alpha <- 0.1
v3

# VlnPlot(seurat_integrated, features = y$gene[1:10], stack = TRUE)

FeaturePlot(seurat_integrated, features = y$gene[1:9]) # top gene marker for the first 9 cluster

FeaturePlot(seurat_integrated, features = y$gene[1:2], blend = TRUE)

top10 <- Qiu.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
DoHeatmap(seurat_integrated, features = top10$gene) + NoLegend()

# repeat for lower resolution clusters

Idents(object = seurat_integrated) <- seurat_integrated$integrated_snn_res.0.05
cluster1.markers.low.res <- FindMarkers(seurat_integrated, ident.1 = 1, min.pct = 0.25, only.pos = TRUE) #compare cluster1 with all the rest of clusters
head(cluster1.markers.low.res, n = 5)
##         p_val avg_log2FC pct.1 pct.2 p_val_adj
## Meg3        0  2.9960808 0.735 0.310         0
## Snhg11      0  2.2490148 0.830 0.314         0
## Slc17a7     0  0.6629423 0.786 0.310         0
## R3hdm1      0  8.1347943 0.735 0.252         0
## Atp2b2      0  5.4205087 0.783 0.288         0
VlnPlot(seurat_integrated, assay="RNA", features = row.names(cluster1.markers.low.res)[1:25], pt.size = 0, stack = TRUE, flip = TRUE)

cluster2.markers.low.res <- FindMarkers(seurat_integrated, ident.1 = 2, ident.2 = c(0,4), min.pct = 0.25, only.pos = TRUE) #compare cluster2 with clusters 0 and 4
head(cluster2.markers.low.res)
##        p_val avg_log2FC pct.1 pct.2 p_val_adj
## Apoe       0   4.620273 1.000 0.120         0
## Aldoc      0   5.749845 0.998 0.129         0
## Cst3       0   5.363414 0.998 0.089         0
## Atp1a2     0   5.673813 0.999 0.092         0
## Mt1        0   5.715023 0.997 0.114         0
## Plpp3      0   7.071290 0.999 0.095         0
VlnPlot(seurat_integrated, assay = "RNA", features = row.names(cluster2.markers.low.res)[1:15], stack = TRUE, flip = TRUE)

VlnPlot(seurat_integrated, assay = "RNA", features = row.names(cluster2.markers.low.res)[1:15], stack = TRUE, flip = TRUE, split.by = "group.id", split.plot = TRUE)

# repeat for lower resolution clustering

Qiu.markers.low.res <-  FindAllMarkers(seurat_integrated,
                                    only.pos=TRUE, min.pct=0.25, logfc.threshold=0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
write.csv(Qiu.markers.low.res,paste0(wd$output,"Qiu_all_markers_low_res.csv"))
head(Qiu.markers.low.res,10)
##         p_val avg_log2FC pct.1 pct.2 p_val_adj cluster    gene
## Camk2n1     0   6.603516 0.670 0.167         0       0 Camk2n1
## Itpka       0   6.012532 0.497 0.163         0       0   Itpka
## Snap25      0   5.284015 0.793 0.168         0       0  Snap25
## Nrgn        0   5.172864 0.726 0.140         0       0    Nrgn
## Nrn1        0   5.054037 0.722 0.159         0       0    Nrn1
## Lmo4        0   4.579140 0.694 0.196         0       0    Lmo4
## Map1b       0   4.499258 0.675 0.272         0       0   Map1b
## Hpca        0   4.489036 0.661 0.203         0       0    Hpca
## Snca        0   4.299523 0.753 0.190         0       0    Snca
## Nrsn1       0   4.278136 0.551 0.206         0       0   Nrsn1
y <- Qiu.markers.low.res %>% group_by(cluster) %>% top_n(n = 1, wt = avg_log2FC)
v2 <- VlnPlot(seurat_integrated, assay = "RNA", features = y$gene, stack = TRUE, flip = TRUE)
v2

v3 <- VlnPlot(seurat_integrated, assay = "RNA", features = c("Foxp2", "Snap25"))
v3$layers[[2]]$aes_params$alpha <- 0.1

v3

# VlnPlot(seurat_integrated, features = y$gene[1:10], stack = TRUE)

v4 <- FeaturePlot(seurat_integrated, features = y$gene[1:9]) # top gene marker for the first 9 cluster

v4

top10 <- Qiu.markers.low.res %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
v5 <- DoHeatmap(seurat_integrated, features = top10$gene) + NoLegend()

v5

Identifying cell type

Option 1: SingleR package with built-in reference

I use a collection of mouse bulk RNA-seq data sets obtained from celldex package (Benayoun et al. 2019). A variety of cell types are available, mostly from blood but also covering several other tissues. This identifies marker genes from the reference and uses them to compute assignment scores (based on the Spearman correlation across markers) for each cell in the test dataset against each label in the reference. The label with the highest score is the assigned to the test cell, possibly with further fine-tuning to resolve closely related labels.

This reference consists of a collection of mouse bulk RNA-seq data sets downloaded from the gene expression omnibus (Benayoun et al. 2019). A variety of cell types are available, again mostly from blood but also covering several other tissues.

#BiocManager::install("SingleR")
#BiocManager::install("celldex")
Idents(seurat_integrated)<- "seurat_clusters"

ref <- MouseRNAseqData()
## snapshotDate(): 2022-04-26
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
table(Idents(object = seurat_integrated))
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## 1599 1399 1120  691  662  625  621  543  456  435  427  395  390  373  297  198 
##   16   17   18   19   20   21   22   23   24 
##  194  174  174  160  159  109   78   74   71
hpca.se <- SingleR(test = seurat_integrated@assays$RNA@data, ref = ref,
                    labels = ref$label.main)

p1 <- plotScoreHeatmap(hpca.se)
p1

plotDeltaDistribution(hpca.se, ncol = 3)
## Warning: Groups with fewer than two data points have been dropped.
## Warning: The following aesthetics were dropped during statistical transformation: x, y,
## PANEL, group
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## Warning in max(data$density): no non-missing arguments to max; returning -Inf
## Warning: Computation failed in `stat_ydensity()`
## Caused by error in `$<-.data.frame`:
## ! replacement has 1 row, data has 0

all.markers <- metadata(hpca.se)$de.genes
seurat_integrated$labels_hpca <- hpca.se$labels
# Extract identity and sample information from seurat object to determine the number of cells per cluster per sample
n_cells <- FetchData(seurat_integrated, 
                     vars = c("integrated_snn_res.0.5",
                              "labels_hpca")) %>%
  dplyr::count(integrated_snn_res.0.5,
               labels_hpca) %>%
  tidyr::spread(integrated_snn_res.0.5, n)

# View(n_cells)
write.csv(n_cells, paste0(wd$output,"n_cells_mouseRNAseqData_twobraingroups.csv"))


## comparing with the unsupervised clustering
tab <- table(cluster= seurat_integrated$integrated_snn_res.0.5, label= hpca.se$labels) 

p3 <- pheatmap::pheatmap(log10(tab+10))

Option 2: manual annotation

refer to the seurat_integrated original script

Idents(object = seurat_integrated) <- seurat_integrated$integrated_snn_res.0.5

Plotting Astrocyte Markers

VlnPlot(object = seurat_integrated, assay = "RNA", features =c("Aqp4","Prdx6","Pla2g7"))

Plotting Microglia Markers

VlnPlot(object = seurat_integrated, assay = "RNA", features =c("C1qc","Ly86", "Tmem119"))

Plotting Endothelial Markers

VlnPlot(object = seurat_integrated, assay = "RNA", features =c("Ly6a",  "Ly6c1",  "Pltp"))

Plotting Oligodendrocyte Markers

VlnPlot(object = seurat_integrated, assay = "RNA", features =c("Mag",  "Mbp",  "Cldn11"))

Plotting Glutamatergic Neuron Markers

VlnPlot(object = seurat_integrated, assay = "RNA", features =c("Nrgn", "Sv2b",  "Snap25"))

Plotting Gabaergic Neuron Markers

VlnPlot(object = seurat_integrated, assay = "RNA", features =c("Gad1",  "Gad2",  "Rab3b" ))

Plotting Oligodendrocyte Precursor Markers

VlnPlot(object = seurat_integrated, assay = "RNA", features =c("Pdgfra",  "Cspg4",  "Gm2a"))

Rename cluster based on the SingleR results and manual annotation

seurat_integrated <- RenameIdents(object = seurat_integrated, 
                         "0" = "Neurons(Glutamatergic)",
                         "1" = "Astrocytes",
                         "2" = "Endothelial cells",
                         "3" = "Neurons(Glutamatergic)",
                         "4" = "Microglia",
                         "5" = "Neurons(Glutamatergic)",
                         "6" = "Oligodendrocytes",
                         "7" = "Neurons(Glutamatergic)",
                         "8" = "Neurons(Glutamatergic)",
                         "9" = "OPC_precursor",
                         "10" = "Neurons(Gabaergic)",
                         "11" = "Neurons(Glutamatergic)",
                         "12" = "Neurons(Gabaergic)",
                         "13" = "Neurons(Glutamatergic)",
                         "14" = "Neurons(Glutamatergic)",
                         "15" = "Endothelial cells",
                         "16" = "Endothelial cells",
                         "17" = "Neurons(Glutamatergic)",
                         "18" = "Astrocytes",
                         "19" = "Neurons(Glutamatergic)",
                         "20"= "Astrocytes",
                         "21"= "Neurons(Glutamatergic)",
                         "22"= "Oligodendrocytes",
                         "23"= "Endothelial cells",
                         "24"= "Endothelial cells"                                                  )
seurat_integrated$celltype_LC <- seurat_integrated@active.ident

DimPlot(seurat_integrated,
                reduction = "umap",
                group.by = "integrated_snn_res.0.5",
                label = TRUE,
)

DimPlot(seurat_integrated,
                reduction = "umap",
                group.by = "integrated_snn_res.0.5",
                split.by = "sample.id",
                label = TRUE,
)

Idents(object = seurat_integrated) <- "celltype_LC"

plot <- DimPlot(seurat_integrated,
                reduction = "umap",
                group.by = "celltype_LC",
                label = FALSE,
)
plot$data$seurat_clusters <- seurat_integrated@meta.data$integrated_snn_res.0.5
all(rownames(plot$data) == rownames(seurat_integrated@meta.data))
## [1] TRUE
LabelClusters(plot, id = "seurat_clusters")

Idents(object = seurat_integrated) <- "celltype_LC"
plot <- DimPlot(seurat_integrated,
                reduction = "umap",
                group.by = "celltype_LC",
                split.by = "sample.id",
                label = FALSE,
) #split.by = "group.id"
plot$data$seurat_clusters <- seurat_integrated@meta.data$integrated_snn_res.0.5
all(rownames(plot$data) == rownames(seurat_integrated@meta.data))
## [1] TRUE
LabelClusters(plot, id = "seurat_clusters")

saveRDS(seurat_integrated, paste0(wd$output,"seurat_integrated_twobraingroups.RDS"))

Differential expression Analysis for groups

Idents(seurat_integrated) <- "group.id"
groups <- unique(Idents(seurat_integrated)) # -only two groups, WT KO

pairwise <- combn(groups, 2)

results <- list()
# one comparison group WT and KO that takes 25 min
for(i in 1:ncol(pairwise)) {
  markers <- FindMarkers(seurat_integrated, ident.1 = pairwise[1, i], ident.2 = pairwise[2, i], logfc.threshold = 0)
  comparisons <- pairwise[, i]
  markers$comparison <- paste(comparisons[1], comparisons[2], sep = '_')
  results[[i]] <- markers
}

results.df <- do.call(rbind, results)
saveRDS(results.df, paste0(wd$output,"between_group_comp.rds"))
Idents(seurat_integrated)<- "group.id"

clusters <- levels(seurat_integrated$celltype_LC)

groups <- unique(Idents(seurat_integrated))
pairwise <- combn(groups, 2)

full_results<-data.frame()
for (k in 1:length(clusters)){
  results <- list()
  seurat_integrated.subset <- subset(seurat_integrated, subset= celltype_LC == clusters[k])
  for(i in 1:ncol(pairwise)) {
    markers <- FindMarkers(seurat_integrated.subset, ident.1 = pairwise[1, i], ident.2 = pairwise[2, i], logfc.threshold = 0)
    comparisons <- pairwise[, i]
    markers$comparison <- paste(comparisons[1], comparisons[2], sep = '_')
    markers$genes<- rownames(markers)
    results[[i]] <- markers
  }
  results.df <- do.call(rbind, results)
  results.df$Cluster<- clusters[k]
  full_results<-rbind(full_results,results.df)
}
  
# full_results
saveRDS(full_results, paste0(wd$output,"full_results.rds"))

write.csv(full_results, paste0(wd$output,"full_results_marker_comp_by_clusters.csv"))

SQ: not sure if we need to do this, manual annotation already has great results

Option 3: manual annotation for 25 clusters

Refer to Tasic et al, Nature 2018, marker list # https://github.com/AllenInstitute/tasic2018analysis/blob/master/RNA-seq%20Analysis/markers.R

Inh.markers <- list(c("Gad2","Slc32a1","Prox1","Adarb2","Nfix","Nfib","Cacna2d1",
                  "Cxcl14","Tnfaip8l3","Cplx3","Lamp5","Cd34","Pax6","Krt73",
                  "Scrg1","Egln3","Ndnf","Tmem182","Ntn1","Pde11a","Pdlim5",
                  "Lsp1","Slc35d3","Nkx2-1","Serpinf1","Col14a1","Vip","Sncg",
                  "Crabp1","Slc10a4","Cldn10","Bhlhe22","Crispld2","Slc17a8",
                  "Cyb5r2","Nr1h4","Wnt7b","Prss12","Igfbp6","Calb2","Grpr",
                  "Pthlh","Elfn1","Rspo1","Slc18a3","Lmo1","Rspo4","Sostdc1",
                  "Chat","Cbln4","Gsx2","Gpc3","Mab21l1","C1ql1","Itih5","Mybpc1",
                  "Myl1","Lhx6","Sox6","Sst","Chodl","Calb1","Cbln4","Etv1","Edn1",
                  "Kl","Il1rapl2","Myh8","Ptprk","Chrna2","Myh13","Ptgdr","Crhr2",
                  "Hpse","Igsf9","C1ql3","Tacstd2","Th","Col6a1","Nts","Tac1","Pvalb",
                  "Gabrg1","Il7","Bche","Prdm8","Syt2","Ostn","Pdlim3","C1ql1",
                  "Gpr149","Vipr2","Meis2","Adamts19","Cpa6","Lgr6"))
Inh.comb.markers <- c("Reln","Cnr1","Nr2f2","Cck","Npy","Crh","Tac2")

Ex.markers <- list(unique(c("Slc17a7","Rtn4rl2","Slc30a3","Cux2","Stard8","Otof","Rrad",
                            "Penk","Agmat",
                      "Emx2","S100a3","Macc1","Rorb","Scnn1a","Whrn","Endou","Col26a1",
                      "Rspo1","Fezf2","Hsd11b1","Batf3","Arhgap25","Colq","Pld5","Olfr78",
                      "Tcap","Fgf17","Wfdc18","Wfdc17","Aldh1a7","Tgfb1","Ctsc","Rxfp2",
                      "Prss35","Rgs12","Osr1","Oprk1","Cd52","Col23a1","Col18a1","Car1",
                      "Car3","Fam84b","Chrna6","Chrnb3","Fn1","Tac1","Lce3c","Erg",
                      "Cdc42ep5","Bmp5","Pvalb","Depdc7","Stac","C1ql2","Ptgfr","Slco2a1",
                      "Pappa2","Dppa1","Npsr1","Htr2c","Hpgd","Nxph3","Sla2","Tshz2",
                      "Rapgef3","Slc17a8","Trh","Nxph2","Foxp2","Col12a1","Syt6","Col5a1",
                      "Gpr139","Ly6d","Sla","Cpa6","Ppp1r18","Faim3","Ctxn3","Nxph4",
                      "Cplx3","Ctgf","Col8a1","Mup5","Ngf","Fam150a","F2r","Serpinb11","Fbxl7",
                      "P2ry12","Crh","Kynu","Hsd17b2","Mup3","Tlcd1","Lhx5","Trp73","Cpa6",
                      "Gkn1","Col18a1","Lce3c","Erg","Bmp5","Stac","C1ql2","Slco2a1","Lrrc9",
                      "Trhr","Myzap","Krt80","H60b","Fam150a","Clic5","Kcnj5","Olfr110",
                      "Olfr111")))
Ex.comb.markers <- c("Reln","Cdh13","Cpne7","Alcam","Rprm","Marcksl1")
                            

Global.markers <- list(c("Fez1","Phyhipl","Aplp1","Gnao1","Caly","Snap25","Atp1a3","Camk2b",
                    "Syt1","Gabrg2","Fabp3","Stmn2","Kif5c","Slc32a1","Gad2","Dlx1","Dlx5",
                    "Dlx2","Dlx6os1","Slc6a1","Sox2","Slc17a7","Nrn1","Neurod2","Sv2b","Satb2",
                    "Tbr1","Vsig2","Cmtm5","Kcnj10","S100a16","S100a13","S1pr1","Gja1","Gjb6",
                    "Aqp4","Lcat","Acsbg1","Olig1","Sox10","Neu4","Sapcd2","Gpr17","Plp1",
                    "Cldn11","Mag","Mog","Nkx6-2","Enpp6","9630013A20Rik","Brca1",
                    "Mog","Opalin","Gjb1","Hapln2","Cyba","Ctsh","Ifitm3","Sparc",
                    "S100a11","Dcn","Col1a1","Pltp","Vtn","Slc6a13","Spp1","Slc13a3",
                    "Col15a1","Slc47a1","Tgtp2","Ifi47","Esam","Slco1a4","Slc38a5",
                    "Cldn5","H2-Q7","Slc38a11","Art3","Ace2","Acta2","Myh11","Pln",
                    "Gja5","Kcnj8","Atp13a5","Aoc3","Ctss","C1qb","C1qc","C1qa","Cbr2",
                    "F13a1","Pf4","Mrc1","Siglech","Selplg"))

seurat_integrated <- AddModuleScore(object = seurat_integrated, 
                           features = Ex.markers, 
                           name = "Excitatory_marker_score")
## Warning: The following features are not present in the object: Rtn4rl2, Agmat,
## Macc1, Scnn1a, Col26a1, Arhgap25, Colq, Olfr78, Fgf17, Wfdc17, Aldh1a7, Rxfp2,
## Prss35, Car1, Car3, Fam84b, Chrna6, Chrnb3, Lce3c, Cdc42ep5, Depdc7, Stac,
## Ptgfr, Slco2a1, Pappa2, Dppa1, Npsr1, Sla2, Trh, Nxph2, Col5a1, Gpr139, Cpa6,
## Faim3, Ctxn3, Nxph4, Col8a1, Mup5, Fam150a, F2r, Serpinb11, Fbxl7, Kynu,
## Hsd17b2, Mup3, Lhx5, Trp73, Gkn1, Lrrc9, Myzap, Krt80, H60b, Kcnj5, Olfr110,
## Olfr111, not searching for symbol synonyms
FeaturePlot(object = seurat_integrated, features = "Excitatory_marker_score1", label = T) + 
  ggtitle("FeaturePlot_Excitatory marker score (refer to Tasic et al., 2018, Nature)")

seurat_integrated <- AddModuleScore(object = seurat_integrated, 
                           features = Inh.markers, 
                           name = "Inhibitory_marker_score")
## Warning: The following features are not present in the object: Nfix, Tnfaip8l3,
## Krt73, Tmem182, Slc35d3, Nkx2-1, Crabp1, Slc10a4, Cyb5r2, Nr1h4, Prss12, Grpr,
## Slc18a3, Rspo4, Sostdc1, Chat, Gsx2, Mab21l1, Mybpc1, Myl1, Chodl, Kl, Il1rapl2,
## Myh8, Ptprk, Chrna2, Myh13, Ptgdr, Crhr2, Hpse, Igsf9, Tacstd2, Th, Il7, Prdm8,
## Ostn, Gpr149, Vipr2, Adamts19, Cpa6, not searching for symbol synonyms
FeaturePlot(object = seurat_integrated, features = "Inhibitory_marker_score1", label = T) + 
  ggtitle("FeaturePlot_Inhibitory marker score (refer to Tasic et al., 2018, Nature)")

seurat_integrated <- AddModuleScore(object = seurat_integrated, 
                           features = Global.markers, 
                           name = "Global_marker_score")
## Warning: The following features are not present in the object: Gnao1, Camk2b,
## Fabp3, Kif5c, Dlx5, Satb2, Vsig2, 9630013A20Rik, Slc47a1, Tgtp2, Ifi47, Gja5,
## Aoc3, Cbr2, F13a1, Pf4, Mrc1, not searching for symbol synonyms
FeaturePlot(object = seurat_integrated, features = "Global_marker_score1", label = T) + 
  ggtitle("FeaturePlot_Global marker score (refer to Tasic et al., 2018, Nature)")

saveRDS(seurat_integrated, paste0(wd$output,"seurat_integrated_twobraingroups.RDS"))

additional plots

Qiu.ab<- subset(seurat_integrated, subset = group.id %in% c("WT", "KO"))
ab_markers<- subset(full_results, subset = comparison == "WT_KO") 
ab_markers<- subset(ab_markers, subset = p_val_adj < 0.005)
ab_markers<- ab_markers[order(-abs(ab_markers$avg_log2FC)),]
write.csv(ab_markers, paste0(wd$output,"sorted_ab_marker_all_clusters.csv"))
##
num_markers<- as.data.frame(table(ab_markers$Cluster))
num_markers<- num_markers[order(num_markers$Freq),]

p<-ggplot(data=num_markers, aes(x=Var1, y=Freq, fill=Var1)) +
  geom_bar(stat="identity")+
  theme_minimal()+
  coord_flip()+
  labs(title="Number of Differentially Expressed Genes by Cluster in AB", y="Number of Differentially Expressed Genes", x="Cell Type")+
  theme(legend.position="none")
p

##
Idents(Qiu.ab)<-'celltype_LC'
features<- unique(ab_markers$genes[1:11])
DotPlot(Qiu.ab, features = features) + RotatedAxis()

# cluster specific comparison and violin plot
ab_markers_GluN <- subset(ab_markers, subset = ab_markers$p_val_adj < 0.005& ab_markers$Cluster =="Neurons(Glutamatergic)")
ab_markers_GluN<- ab_markers_GluN[order(-abs(ab_markers_GluN$avg_log2FC)),]
head(ab_markers_GluN,20)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj comparison   genes
## Prrg1    9.466617e-08  -5.228159 0.117 0.047  2.839985e-04      WT_KO   Prrg1
## Pdlim3   4.729550e-09  -5.200669 0.182 0.018  1.418865e-05      WT_KO  Pdlim3
## Tagln    6.255062e-29   4.970390 0.107 0.028  1.876519e-25      WT_KO   Tagln
## Klk6     9.132891e-42  -4.666907 0.236 0.034  2.739867e-38      WT_KO    Klk6
## Plin4    2.272906e-28  -4.563661 0.159 0.011  6.818717e-25      WT_KO   Plin4
## Socs3    6.082897e-07  -4.499833 0.143 0.154  1.824869e-03      WT_KO   Socs3
## Fbxl22  1.425446e-139  -4.479195 0.425 0.078 4.276337e-136      WT_KO  Fbxl22
## Lamb1    6.690106e-08  -4.451595 0.173 0.071  2.007032e-04      WT_KO   Lamb1
## Sox8     6.142291e-16  -4.419788 0.137 0.088  1.842687e-12      WT_KO    Sox8
## Mustn1   1.831936e-60  -4.376585 0.254 0.146  5.495808e-57      WT_KO  Mustn1
## Emx2os   4.476503e-22  -4.374314 0.203 0.096  1.342951e-18      WT_KO  Emx2os
## Fbln5    7.626148e-07  -4.331924 0.103 0.024  2.287844e-03      WT_KO   Fbln5
## Bicc1    1.033198e-09  -4.306721 0.208 0.110  3.099593e-06      WT_KO   Bicc1
## Wnt3     2.434513e-15  -4.277500 0.137 0.034  7.303538e-12      WT_KO    Wnt3
## Lcp2     7.540279e-12  -4.257663 0.205 0.163  2.262084e-08      WT_KO    Lcp2
## Gm15440  2.556962e-11   4.170513 0.227 0.208  7.670886e-08      WT_KO Gm15440
## Slc9a2   1.903271e-16  -4.170395 0.128 0.035  5.709813e-13      WT_KO  Slc9a2
## Tox3     1.358598e-11   4.167284 0.130 0.106  4.075794e-08      WT_KO    Tox3
## Ugdh     3.897508e-08  -4.067804 0.164 0.155  1.169252e-04      WT_KO    Ugdh
## Dlx2     4.111044e-17   4.063206 0.142 0.020  1.233313e-13      WT_KO    Dlx2
##                        Cluster
## Prrg1   Neurons(Glutamatergic)
## Pdlim3  Neurons(Glutamatergic)
## Tagln   Neurons(Glutamatergic)
## Klk6    Neurons(Glutamatergic)
## Plin4   Neurons(Glutamatergic)
## Socs3   Neurons(Glutamatergic)
## Fbxl22  Neurons(Glutamatergic)
## Lamb1   Neurons(Glutamatergic)
## Sox8    Neurons(Glutamatergic)
## Mustn1  Neurons(Glutamatergic)
## Emx2os  Neurons(Glutamatergic)
## Fbln5   Neurons(Glutamatergic)
## Bicc1   Neurons(Glutamatergic)
## Wnt3    Neurons(Glutamatergic)
## Lcp2    Neurons(Glutamatergic)
## Gm15440 Neurons(Glutamatergic)
## Slc9a2  Neurons(Glutamatergic)
## Tox3    Neurons(Glutamatergic)
## Ugdh    Neurons(Glutamatergic)
## Dlx2    Neurons(Glutamatergic)
Idents(seurat_integrated) <- seurat_integrated$integrated_snn_res.0.5
# Idents(seurat_integrated) <- "celltype_LC"
v1 <- VlnPlot(seurat_integrated, assay = "RNA", features = row.names(ab_markers_GluN)[1:15], stack = TRUE, flip = TRUE, split.by = "group.id", split.plot = TRUE)
v1

Idents(seurat_integrated) <- "celltype_LC"
v2 <- VlnPlot(seurat_integrated, assay = "RNA", features = c("Unc93b1", "Fscn1", "Enpp2", "Prrg1", "Cdkn1a", "Rfx4", "Gypc", "Ddah2", "Gad1", "Sst", "Cd83", "Lyz2", "Tcf4", "Egr1", "Sox8", "Foxp2", "Met", "Hgf", "Map2k1"), stack = TRUE, flip = TRUE, split.by = "group.id", split.plot = TRUE)

v2

Idents(seurat_integrated) <- seurat_integrated$group.id
v3 <- VlnPlot(seurat_integrated, assay = "RNA", features = c("Unc93b1", "Fscn1", "Enpp2", "Prrg1", "Cdkn1a", "Rfx4", "Gypc", "Ddah2", "Gad1", "Sst", "Cd83", "Lyz2", "Tcf4", "Egr1", "Sox8", "Foxp2", "Met", "Hgf", "Map2k1"), stack = TRUE, flip = TRUE)
v3

###########################
#v1 <- VlnPlot(Qiu.ab, features = features, stack = TRUE, flip = TRUE)
#ggsave("./v8_output/top_WT_KO_DEG.pdf", width=8, height = 10)
#rm(v1)
###########################

Idents(Qiu.ab)<-'celltype_LC'
#current.cluster.ids <- c("Glutamatergic","Astrocyte","Endothelial","Microglia","Oligodendrocyte Precursor","Gabaergic","Pericyte","Oligodendrocyte","Endothelial+Pericyte")
#new.cluster.ids <- c("Gl_N","A","EC","M","O PC","Gb_N","P","O","EC+P")
#Qiu.ab@active.ident <- plyr::mapvalues(x = Qiu.ab@active.ident, from = current.cluster.ids, to = new.cluster.ids)
#Qiu.ab@meta.data$cell.type.abbr <- Qiu.ab@active.ident

DoHeatmap(subset(Qiu.ab, downsample = 100), features = unique(ab_markers$genes)[1:100], size = 3)+
      theme(axis.text.y = element_text(size = 5), text=element_text(size=20))

# library(monocle3)
# library(SeuratWrappers) 

#data <- GetAssayData(seurat_integrated)[VariableFeatures(seurat_integrated),]
#pd <- data.frame(seurat_integrated@meta.data)
#fData <- data.frame(gene_short_name = rownames(data), row.names = row.names(data))
#cds <- new_cell_data_set(expression_data = data,
                       # cell_metadata = pd,
                       # gene_metadata = fData)
#cds <- preprocess_cds(cds, num_dim = 12,method = "PCA")
#plot_pc_variance_explained(cds)
#cds <- reduce_dimension(cds,
                        #preprocess_method = "PCA",
                        #reduction_method= "tSNE",
                        #verbose = T)
#cds <- cluster_cells(cds = cds , reduction_method = "tSNE", verbose = TRUE,
                     #resolution = NULL)
#p1 <- plot_cells(cds, reduction_method = "tSNE")
##compare with seurat data, res = 0.5
#cds@clusters$tSNE$clusters <- seurat_integrated$integrated_snn_res.0.5
#p2 <- plot_cells(cds, reduction_method = "tSNE")
#p1 + p2

visualization using ShinnyCell

#```{r} library(ShinyCell)

setwd(wd\(output) #setwd("D:/Bioinformatics Projects/LindseyAanalysis/single_cell/two_brain_groups/v8_output") # getExampleData() # Download example dataset (~200 MB) no need to run this if you run it before, with the .rds file in working directory seurat_integrated = readRDS(paste0(wd\)output,“seurat_integrated_twobraingroups.RDS”)) scConf = createConfig(seurat_integrated) makeShinyApp(seurat_integrated, scConf, gene.mapping = TRUE, shiny.title = “four sample_two brain groups”)

rm(list = ls())

rm(list = setdiff(ls(), “plot”))

setwd(paste0(wd$output,“shinyApp”)) # verify change of dir in console using getwd()

shiny::runApp() # after you run this App, another R will be open, maximize it, then select and plot and test the graphic output. While this R window is open, the > ready in console will NOT be ready. You can also select output to browser. In order for the browser to work, you need to keep this new R window open. If you close the R window, the brower will gray out. ```